# 读取数据
import pandas as pd
data = pd.read_csv('exam_results.csv')

# 给x和y赋值
y = data.loc[:, 'exam3_pass_or_not']
x1 = data.loc[:, 'exam1']
x2 = data.loc[:, 'exam2']
x = {
    'x1': x1,
    'x2': x2,
    'x1^2': x1 ** 2,
    'x2^2': x2 ** 2,
    'x1*x2': x1 * x2,
}

x = pd.DataFrame(x)

# 训练模型
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(x, y)

# 获取决策边界
theta1, theta2, theta3, theta4, theta5 = model.coef_[0]
theta0 = model.intercept_[0]

# 获取预测值
prediction = model.predict(x)

# 可视化
import matplotlib.pyplot as plt
import numpy as np

x1 = data.loc[:, 'exam1'].to_numpy()
x2 = data.loc[:, 'exam2'].to_numpy()
y = data.loc[:, 'exam3_pass_or_not'].to_numpy()

class0 = (y == 0)
class1 = (y == 1)

plt.scatter(x1[class0], x2[class0], c='r', marker='o')
plt.scatter(x1[class1], x2[class1], c='b', marker='x')
plt.xlabel('exam1')
plt.ylabel('exam2')

# 可视化二阶决策边界

# 定义exam1和exam2的范围，为了画网格
x1_min, x1_max = x1.min() - 1, x1.max() + 1
x2_min, x2_max = x2.min() - 1, x2.max() + 1

# 生成网格数据
xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max, 500),
                       np.linspace(x2_min, x2_max, 500))

# 计算每个点的决策边界值
z = (theta0 +
     theta1 * xx1 +
     theta2 * xx2 +
     theta3 * xx1 ** 2 +
     theta4 * xx2 ** 2 +
     theta5 * xx1 * xx2)

# 绘制样本点
plt.scatter(x1[class0], x2[class0], c='r', label='Not Pass', marker='o')
plt.scatter(x1[class1], x2[class1], c='b', label='Pass', marker='x')

# 绘制决策边界
plt.contour(xx1, xx2, z, levels=[0], colors='g')

# 添加标签和图例
plt.xlabel('Exam1 Score')
plt.ylabel('Exam2 Score')
plt.legend()
plt.title('Decision Boundary')
plt.show()

# 计算准确率
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y, prediction)
print(f'Accuracy: {accuracy}')
